clear all
capture set more off


* append Census data
gen year = .

qui forval year = 1870(10)1940 {

	append using "$path/Replication/raw_data/`year'census_county.dta"
	replace year = `year' if year==.
	
}

* keep only counties (drop state aggregated)
drop if level==2

* merge with WWI enlistments
sort state county year

merge m:1 state county using "$path/Replication/cleaned_data/WWI inductions by county - clean.dta" 
drop if _merge==2
drop _merge

* compute Germans in county
egen germans = rowtotal(pbgerman pbwgerm)

gen germanshare = germans/totpop*100


* merge with WWI casualties
merge m:1 fips using "$path/Replication/cleaned_data/WWI casualties Army Navy clean - county.dta"
drop if _merge==2
drop _merge

egen deaths_acc_dis = rowtotal(dead_acc dead_dis)

* age dist
merge m:1 fips using "$path/Replication/raw_data/age_distribution.dta"
drop if _merge==2
drop _merge

egen age18_45_all = rowtotal(all_10- all_37)
egen age18_45_amm = rowtotal(am_10- am_37)

egen age18_40_all = rowtotal(all_10- all_32)
egen age18_40_amm = rowtotal(am_10- am_32)

egen age18_35_all = rowtotal(all_10- all_27)
egen age18_35_amm = rowtotal(am_10- am_27)

drop all_10- am_37

* WWI camps
merge m:1 fips using "$path/Replication/raw_data/WWI camp locations.dta" 
drop if _merge==2
drop _merge

ren mindist mindist_camp

* war contracts
merge m:1 fips using "$path/Replication/raw_data/warcontracts_geoloc.dta"
drop _merge

ren mindist mindist_contr

* Gentzkow-Shapiro data
merge m:1 gisjoin using "$path/Replication/raw_data/neighborfile1910.dta", keepusing(gisjoin icpsrfip_1)
gen newspaper_neighbor = _merge==3
drop if _merge==2
drop _merge

merge m:1 icpsrfip_1 using "$path/Replication/raw_data/Gentzkow_Shapiro_newspapers_cnty_clean.dta" 
drop if _merge==2
drop _merge

* merge with NPcom data
merge m:1 fips using "$path/Replication/raw_data/npcom_data_2021-7-9.dta"
drop if _merge==2
drop _merge

 

egen casrate_1845all = max((dead_all/age18_45_all*100)*(year==1910)), by(fips)
egen casrate_1845amm = max((dead_all/age18_45_amm*100)*(year==1910)), by(fips)
egen casrate_1840all = max((dead_all/age18_40_all*100)*(year==1910)), by(fips)
egen casrate_1840amm = max((dead_all/age18_40_amm*100)*(year==1910)), by(fips)
egen casrate_1835all = max((dead_all/age18_35_all*100)*(year==1910)), by(fips)
egen casrate_1835amm = max((dead_all/age18_35_amm*100)*(year==1910)), by(fips)

egen accrate_1845all = max((deaths_acc_dis/age18_45_all*100)*(year==1910)), by(fips)
egen accrate_1845amm = max((deaths_acc_dis/age18_45_amm*100)*(year==1910)), by(fips)
egen accrate_1840all = max((deaths_acc_dis/age18_40_all*100)*(year==1910)), by(fips)
egen accrate_1840amm = max((deaths_acc_dis/age18_40_amm*100)*(year==1910)), by(fips)
egen accrate_1835all = max((deaths_acc_dis/age18_35_all*100)*(year==1910)), by(fips)
egen accrate_1835amm = max((deaths_acc_dis/age18_35_amm*100)*(year==1910)), by(fips)


foreach v in casrate_1845all casrate_1845amm casrate_1840all casrate_1840amm casrate_1835all casrate_1835amm accrate_1845all accrate_1845amm accrate_1840all accrate_1840amm accrate_1835all accrate_1835amm {
	
	replace `v' = 100 if `v'>100 & `v'!=.
	xtile c_`v'  = `v' if year==1910, nq(5)
	egen Q5_`v' = max(c_`v'), by(fips)
	replace Q5_`v' = 0 if year==1910
	drop c_`v'
}

xtile c2_casrate_1845amm = casrate_1845amm if year==1910, nq(2)
xtile c3_casrate_1845amm = casrate_1845amm if year==1910, nq(3)
xtile c4_casrate_1845amm = casrate_1845amm if year==1910, nq(4)

egen med_casrate_1845amm = max(c2_casrate_1845amm), by(fips)
egen ter_casrate_1845amm = max(c3_casrate_1845amm), by(fips)
egen quar_casrate_1845amm = max(c4_casrate_1845amm), by(fips)

drop c2_casrate_1845amm c3_casrate_1845amm c4_casrate_1845amm
 
gen casrate_temp = dead_all/mvote*100
replace casrate_temp = 100 if casrate_temp>100 & casrate_temp!=.


egen casualtyrate   = max(casrate_temp), by(state county)



gen draftrate_amm1845_tmp = tot_accepted_num/age18_45_amm*100 if year==1910
replace draftrate_amm1845_tmp	= 100 if draftrate_amm1845_tmp>100 & draftrate_amm1845_tmp!=.

egen draftrate_amm1845 = max(draftrate_amm1845_tmp), by(state county)

gen draftrate_amm1845post 	= draftrate_amm1845*(year>1910)



egen id = group(state county)


xtset id year, delta(10)
gen Dgermanshare = D.germanshare
gen Dgermans = D.germans

gen negsh = Dgermanshare<0 if year==1920
egen maxnegsh = max(negsh), by(id)

gen negnum = Dgermans<0 if year==1920
egen maxnegnum = max(negnum), by(id)

* placebo groups
egen swedes   = rowtotal(pbswedno pbsweden pbwswed)
egen italians = rowtotal(pbitaly pbwitaly)
egen english  = rowtotal(pbenglwa pbenglan pbwengla)

gen swedeshare   = swedes/totpop*100
gen italianshare = italians/totpop*100
gen englishshare = english/totpop*100

gen Dswedeshare   = D.swedeshare
gen Ditalianshare = D.italianshare
gen Denglishshare = D.englishshare



* we drop one outlier which was also dropped by Roberts, Evan and Alexandra Burda, "Correlates and Consequences of American War Casualties in World War I," University of Minnesota Working Paper, 2018, 2018-3
drop if casrate_1845amm>17 & casrate_1845amm!=.

* generate casualty rate quintiles
gen casrate = casrate_1845amm

replace casualtyrate = . if casualtyrate>7
xtile cas  = casualtyrate if year==1920, nq(5)
egen quincas = max(cas), by(state county)
replace quincas = 0 if year==1910
drop cas


* residualize population growth and time fixed effects out from the different immigrant pop share growth rates

	* Germans
	reg Dgermanshare totpop i.year
	predict double Dgermanshareres, res 

	* do this for Swedes, Italians, and English as well (placebo groups)
	qui reg Dswedeshare totpop i.year
	predict double Dswedeshareres, res 

	qui reg Ditalianshare totpop i.year
	predict double Ditalianshareres, res 

	qui reg Denglishshare totpop i.year
	predict double Denglishshareres, res 

* pre war pop shares by group
	egen ger1910 = max(germanshare*(year==1910)), by(id)
	egen swe1910 = max(swedeshare*(year==1910)), by(id)
	egen ita1910 = max(italianshare*(year==1910)), by(id)


gen casrate2 = casrate
replace casrate2 = 0 if year==1910


duplicates tag fips year, gen(tag)
drop if tag!=0
drop tag

ren mfglabor mfglabor1

tabmiss mfgestab

ren fips fipsRC
merge 1:1 fipsRC year using "$path/Replication/raw_data/MiningManfUScounty1870to1970Fishbackrequest.dta", update
drop if _merge==2
drop _merge



* merge with latitude and longitude information
merge m:1 icpsrfip county using "$path/Replication/raw_data/county_lat_lon.dta"
drop if _merge==2
drop _merge


* merge with newspaper data
ren icpsrfip fips
drop if fips==.

merge 1:1 fips year using "$path/Replication/raw_data/distance_to_nearest_county_w_newspaper_withNPCOM-2021-7-8.dta", update
drop if _merge==2

tabmiss mfgestab

* firmsize
gen firmsize = mfglabor/mfgestab

* deflate monetary values to baseline 1910
foreach v in rmfgwagesth rmfgoutth mfgrms mfgout {
	replace `v' = `v'*(28/38) if year==1870
	replace `v' = `v'*(28/29) if year==1880
	replace `v' = `v'*(28/27) if year==1890
	replace `v' = `v'*(28/25) if year==1900
	replace `v' = `v'*(28/60.2) if year==1920
	replace `v' = `v'*(28/50.2) if year==1930
	replace `v' = `v'*(28/42.1) if year==1940
}

* source: https://www.minneapolisfed.org/community/financial-and-economic-education/cpi-calculator-information/consumer-price-index-1800

* log wages
gen lnwages = ln(rmfgwagesth/mfglabor)

* worker productivity
gen prod = rmfgoutth/mfglabor

* generated wages per material used
gen wagepermat = mfgwages/mfgrms
gen lnwagepermat = ln(wagepermat)

* output per material used
gen outputpm = rmfgoutth/mfgrms
gen lnoutputpm = ln(outputpm)

xtset id year, delta(10)




gen post = year>1910

gen urbshare = urb25/totpop*100
gen farmspc = farms/totpop*100


* average pre-war emp in MFG
egen meanL = mean(mfglabor/totpop) if year<=1910, by(id)
egen prewarMFGL = max(meanL), by(id)
replace prewarMFGL = 0 if year==1910

egen meanpop = mean(totpop) if year<=1910, by(id)
egen prewarPOP = max(meanpop), by(id)
replace prewarPOP = 0 if year==1910

egen meanMF = mean(mtot/ftot) if year<=1910, by(id)
egen prewarMF = max(meanMF), by(id)
replace prewarMF = 0 if year==1910


gen lnprod = ln(prod)
gen lnfirmsize = ln(firmsize+1)

egen meanG = mean(germanshare) if year<=1910, by(id)
egen prewarGer = max(meanG), by(id)
replace prewarGer = 0 if year==1910

* for placebo groups
	egen meanS = mean(swedeshare) if year<=1910, by(id)
	egen prewarSwe = max(meanS), by(id)
	replace prewarSwe = 0 if year==1910
	
	egen meanI = mean(italianshare) if year<=1910, by(id)
	egen prewarIta = max(meanI), by(id)
	replace prewarIta = 0 if year==1910
	
	egen meanE = mean(englishshare) if year<=1910, by(id)
	egen prewarEng = max(meanE), by(id)
	replace prewarEng = 0 if year==1910
	

egen meanU = mean(urbshare) if year<=1910, by(id)
egen prewarurb = max(meanU), by(id)
replace prewarurb = 0 if year==1910

gen outflow_temp = Dgermanshareres*(Dgermanshareres<0) if year==1920
egen outflow = max(outflow_temp), by(id)
replace outflow = 0 if year<1920


* now change scaling such that 1 unit incr in outflow means 1 p.p. incr in outflow
replace outflow = abs(outflow)

* do this for placebo groups too  

	* Swedish outflow from 1910-20
	gen outflow_temp_swedes = Dswedeshareres*(Dswedeshareres<0) if year==1920
	egen outflow_swedes = max(outflow_temp_swedes), by(id)
	replace outflow_swedes = 0 if year<1920
	replace outflow_swedes = abs(outflow_swedes)

	* Italian outflow from 1910-20
	gen outflow_temp_italians = Ditalianshareres*(Ditalianshareres<0) if year==1920
	egen outflow_italians = max(outflow_temp_italians), by(id)
	replace outflow_italians = 0 if year<1920
	replace outflow_italians = abs(outflow_italians)

	* English outflow from 1910-20
	gen outflow_temp_english = Denglishshareres*(Denglishshareres<0) if year==1920
	egen outflow_english = max(outflow_temp_english), by(id)
	replace outflow_english = 0 if year<1920
	replace outflow_english = abs(outflow_english)
	
	


gen lnrmfgoutth2 = ln(rmfgoutth)
gen Pmiss = lnrmfgoutth2==.
replace lnrmfgoutth2 = 0 if lnrmfgoutth2==.
ren lnrmfgoutth2 lnmfgout



replace mfgout = rmfgoutth if rmfgoutth!=. & mfgout==.
gen lnmfgoutB = ln(mfgout)

gen lnmat = ln(mfgrms+1)
replace lnmat = 0 if lnmat==.
gen Mmiss = lnmat==.

gen Fmiss = firmsize==.
replace firmsize = 0 if firmsize==.

gen Emiss = mfgestab==.
replace mfgestab = 0 if mfgestab==.

gen year2 = year*year

gen lnmfgestab = ln(1+mfgestab)

gen lnwkr = ln(1+mfglabor)

gen lntotpop = ln(totpop)

gen south = statename=="Alabama"  | statename=="Delaware" 	 | statename=="Florida" 		| statename=="Georgia" 		  | statename=="Louisiana" 	| ///
			statename=="Maryland" | statename=="Mississippi" | statename=="North Carolina"  | statename=="South Carolina" | statename=="Texas" 		| ///
			statename=="Virginia"

gen southpost = south*post

gen prewarGersouthpost = southpost*prewarGer

* drop some outliers (where the share of Germans goes from a hand full to 1000+ in one decade)
drop if Dgermanshare>5 & year==1890 & Dgermanshare!=.
	

compress
save "$path/Replication/cleaned_data/county_panel_longrun.dta", replace




* globals w controls for OLS and IV
global controls "c.prewarGer i.year draftrate_amm1845post c.prewarPOP#1.post c.prewarMFGL#1.post c.prewarMF#1.post c.prewarurb#1.post"

* w time varying pop control
global controls2 "c.prewarGer i.year draftrate_amm1845post c.prewarPOP#1.post c.prewarMFGL#1.post c.prewarMF#1.post totpop mtot c.prewarurb#1.post"


* generate sample
qui reghdfe lnwages outflow $controls if year>1890 , a(id) cluster(id)
gen S1 = e(sample)==1

qui ivreghdfe lnmfgestab (outflow= 5.Q5_casrate_1845amm#1.post) $controls2 if year>1890 , a(id) cluster(id)
gen S2 = e(sample)==1

gen S = S1==1 & S2==1
keep if S==1
drop S1 S2

gen quincas5 = quincas==5
replace prewarMFGL = prewarMFGL*100
gen casrate5 = casrate_1845amm if Q5_casrate_1845amm==5

gen lnmfgoutpc = ln(1+mfgout/totpop)
gen lnmfgoutperfirm = ln(mfgout)/mfgestab





gen enemy_share_npcom20 = enemy_share_npcom if year==1920
gen hun_share_npcom20 = hun_share_npcom if year==1920
gen tar_share_npcom20 = tar_share_npcom if year==1920


label var germanshare 			"Share of German-born individuals"
label var casrate_1845amm		"WWI Casualty Rate"
label var Q5_casrate_1845amm	"Dummy Top Casualty Quintile"
label var casrate5    			"Casualty Rate in the Top Quintile"
label var draftrate_amm1845		"WWI Draft Rate"
label var prewarGer   			"Pre-WWI German Share"
label var prewarPOP   			"Pre-WWI Population"
label var prewarurb   			"Pre-WWI Urbanization Rate"
label var prewarMF    			"Pre-WWI Male-to-Female Ratio"
label var prewarMFGL  			"Pre-WWI Share of Manufacturing Empl."
label var lnwages	  			"Log Per Capita Wages in Manufacturing"
label var outflow 	 			"\% Outflow of German Population, 1910-20"
label var enemy_share_npcom20	"\% pages calling Germans \textit{enemies}"
label var hun_share_npcom20		"\% pages calling Germans \textit{huns}"
label var tar_share_npcom20		"\% pages mentioning taring and feathering"


gen wages = rmfgwagesth*1000/mfglabor




egen max_outflow = max(outflow), by(id)
replace max_outflow = 0 if year==1910

foreach v in prewarPOP prewarMFGL prewarMF prewarurb {
	gen `v'post = `v'*(post==1)
}
gen quin5post = (Q5_casrate_1845amm==5)*(post==1)


gen mfglaborpc = mfglabor/totpop
gen lnmfglaborpc = ln(mfglaborpc)
gen mfgestabpc = mfgestab/totpop

cap drop weight

gen weight = 1/mindist


gen treat = 5.Q5_casrate_1845amm#1.post

gen IV = 5.Q5_casrate_1845amm#1.post

* save outflow data at the county level as separate data file for the individual census data
preserve
	
	keep if year==1920
	keep fips outflow
	
	compress
	save "$path/Replication/cleaned_data/outflow_from_cnty_cens.dta", replace
restore

compress
save "$path/Replication/cleaned_data/county_panel_shortrun.dta", replace

